This page is about trend in the number of COVID-19 cases in the US

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## 载入程辑包:'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(rvest)
## 
## 载入程辑包:'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
library(ggplot2)
library(lubridate)
## 
## 载入程辑包:'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

Import data

covid_cum = read_csv("data/covid_cumulative_cases.csv", skip = 2) %>% 
  janitor::clean_names()
## Rows: 657 Columns: 3
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): State, Date
## dbl (1): Total Cases
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning in FUN(X[[i]], ...): strings not representable in native encoding will
## be translated to UTF-8
## Warning in FUN(X[[i]], ...): unable to translate '<U+00C4>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00D6>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00E4>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00F6>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00DF>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00C6>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00E6>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00D8>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00F8>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00C5>' to native encoding
## Warning in FUN(X[[i]], ...): unable to translate '<U+00E5>' to native encoding
covid_day = read_csv("data/covid_daily_cases.csv", skip = 2) %>% 
  janitor::clean_names()
## Rows: 656 Columns: 5
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): State, Date
## dbl (3): New Cases, 7-Day Moving Avg, Historic Cases
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
covid_daily = left_join(covid_day, covid_cum, by = "date") %>% 
  select(date, new_cases, total_cases) %>% 
  separate(date, into = c("month","day","year")) %>% 
  mutate(month = factor(month, levels = month.abb)) %>% 
  group_by(year, month) %>% 
  arrange(year, month, day) %>% 
  mutate(date = make_date(year, month, day)) %>% 
  arrange(date)
  
covid_monthly = covid_daily %>% 
  select(-day) %>% 
  summarize(monthly = sum(new_cases))
## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
covid_seasonal = covid_monthly %>% 
  mutate(quarter = recode(month,
    "Jan" = "q1",
    "Feb" = "q1",
    "Mar" = "q1",
    "Apr" = "q2",
    "May" = "q2",
    "Jun" = "q2",
    "Jul" = "q3",
    "Aug" = "q3",
    "Sep" = "q3",
    "Oct" = "q4",
    "Nov" = "q4",
    "Dec" = "q4"
  )) %>% 
  group_by(year, quarter) %>% 
  summarize(quarterly = sum(monthly)) %>% 
  mutate(date = str_c(year, quarter, sep = "_"))
## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
write_csv(covid_seasonal, 'covid_seasonal.csv')

row_cum = nrow(covid_cum)
row_daily = nrow(covid_daily)

Data description

The covid_cumulative_cases data frame contains the cumulative amount of the covid cases in United State as time wents by. There are 657 days observed in total. The covid_day data frame contains the amount of new covid cases in United State every day. There are 656 days observed in total.

Data cleaning

We first used ‘janitor::clean_names()’ to get a uniform naming scheme. From there, we combine the new case data frame and the cumulative case data frame together and seperate the time points into month, day and year. Then we created a new variable named date that rewrite the date into the form of YYYY-MM-DD. In order to compare the covid case data and the comsumption data, we made a new data frame named covid_seasonal that reorder the covid case data by season (quarter 1 = January - March, quarter 2 = April - June, quarter 3 = July - September, quarter 4 = October - December). Then we write this data frame into a new .csv document.

Plots Daily

daily_fig = plot_ly(covid_daily) 

daily_fig %>% 
  add_trace(x = ~date, y = ~new_cases, type = "bar", yaxis="y", name = "new") %>% 
  add_trace(x = ~date, y = ~total_cases, type = "scatter", mode = "lines", yaxis = "y2", name = "cumulative") %>% 
  layout(yaxis=list(title = "daily new cases", side="left"),
         yaxis2=list(title = "cumulative cases", side="right",overlaying="y"),
         showlegend=TRUE)

Quarterly (to correspond with the consumption data)

covid_seasonal %>% 
  plot_ly(x = ~date, y = ~quarterly, type = "bar")